#!/bin/bash
#BSUB -n 1                     # 1 cores
#BSUB -W 0:20                   # 3-minutes run-time
#BSUB -R "rusage[mem=16000]"     # 32 GB per core
#BSUB -R "rusage[ngpus_excl_p=1]"
#BSUB -R "select[gpu_model0==NVIDIATITANRTX]"
#BSUB -o /nfs/iiscratch-zhang.inf.ethz.ch/export/zhang/export/fm/exe_log/gptJ_inf_4RTXTitan.out.%J
#BSUB -e /nfs/iiscratch-zhang.inf.ethz.ch/export/zhang/export/fm/exe_log/gptJ_inf_4RTXTitan.err.%J

module load gcc/6.3.0 cuda/11.0.3             # Load modules from Euler setup
source activate base                          # Activate my conda python environment
cd /nfs/iiscratch-zhang.inf.ethz.ch/export/zhang/export/fm/GPT-home-private     # Change directory

nvidia-smi

world_size=4

DIST_CONF="--pp-mode pipe_sync_greedy --pipeline-group-size $world_size --cuda-id 0"
MODEL_CONF="--model-type gptj --model-name ./pretrained_models/gpt-j-6B --num-iters 10"
INFERENCE_CONF="--fp16 --batch-size 64 --input-seq-length 512 --generate-seq-length 32 --micro-batch-size 1 --num-layers 7"
COOR_CONF="--coordinator-server-ip 129.132.93.88"

export NCCL_SOCKET_IFNAME=access
export GLOO_SOCKET_IFNAME=access

python dist_inference_runner_w_euler_coordinator.py $DIST_CONF $MODEL_CONF $INFERENCE_CONF $COOR_CONF --lsf-job-no